%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% FIRST STAGE REGRESSION

% "DEFORESTATION IN THE AMAZON:
% A UNIFIED FRAMEWORK FOR ESTIMATION AND POLICY ANALYSIS"

% by Eduardo Souza-Rodrigues

% This version: November 2018

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% First Stage Regression:
% TC = D*beta + X*gamma + eps

% where:
% TC            = transportation cost to the nearest port
% X             = covariates
% eps           = municipality-level unobservable 
% D             = [D1 D2], Euclidean distances to: 
%                 (i) nearest port and (ii) nearest capital,
%                  D is the IV for TC
% (beta, gamma) = parameters

% Unit of observation: municipalitites in the Brazilian Amazon

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% (A) PREPARE THE DATA SET

% Load the Data Set - Use Small-Medium farms because it is contains all municipalities
data1 = [data_folder,'\land_use_matlab_variables.txt'];
load(data1);
data = land_use_matlab_variables;
clear data1 land_use_matlab_variables;
       
% Name of the variables
Code        = data(:,1);      % Municipality Identifier (IBGE's code)
TC          = data(:,2);      % Transportation cost to the port
Alt         = data(:,3);      % Altitude
Temp        = data(:,4);      % Temperature
Rain        = data(:,5);      % Rain
Slope       = data(:,6);      % Average Slope
Soil        = data(:,7:10);   % Proportion of soil quality = [prop_potenc2 prop_potenc3 prop_potenc4 prop_potenc5]
Pop         = data(:,11);     % Population
Mining      = data(:,12);     % Presence of Mining
Powerplant  = data(:,13);     % Presence of Powerplant
PPNeighbor  = data(:,14);     % Indicator for Powerplant Neighbor
DistPA      = data(:,15);     % Straight Line Distance to nearest Protected Area
Title       = data(:,16);     % Proportion of the Private Area with Land Title
Fines2005   = data(:,17);     % Fines up to 2005
Fines2003   = data(:,18);     % Fines up to 2003
D           = data(:,19:20);  % Straight Line Distances = [dist to port, dist to state capital]
Ibama       = data(:,21);     % Distance to the Nearest Ibama Agency
PCensus     = data(:,22);     % Share Deforested, Census
Psat        = data(:,23);     % Share Deforested, Satellite
PsatNo      = data(:,24);     % Share Deforested, Satelite (No Forest Regrowth)
Pfarm       = data(:,25);     % Share of Farm Area in Municipal Area
Pclouds     = data(:,26);     % Share of Cloud Area in Municipal Area
PsatTotal   = data(:,27);     % Share of all Satellite Land-Use Areas in Municipal Area
G_IR        = data(:,28);     % Clusters, IBGE Immediate Regions
G50         = data(:,29);     % Clusters, G = 50
G75         = data(:,30);     % Clusters, G = 75
G100        = data(:,31);     % Clusters, G = 100
G125        = data(:,32);     % Clusters, G = 125
G150        = data(:,33);     % Clusters, G = 150
Wpop        = data(:,34);     % Spatially Lagged Population
Wmining     = data(:,35);     % Spatially Lagged Mining
Wpowerplant = data(:,36);     % Spatially Lagged Powerplants
WdistPA     = data(:,37);     % Spatially Lagged Proximity to Protected Areas

% Number of observations
T  = size(TC,1);    

% Spatially Lagged Regressors
WX = [Wpop, Wmining, Wpowerplant, WdistPA];

% Instruments
if d_iv == 1
    D = D(:,1); % only dist_port as IV
end

% Set of Covariates (Model Specification):
if model == 0
    
    % Main Specification
    X = [D Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 Ibama WX ones(T,1)];
    
elseif model == 1
    
    % No Population
    X = [D Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Title Fines2005 Ibama WX ones(T,1)];
   
elseif model == 2
    
    % No land title proxy
    X = [D Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Fines2005 Ibama WX ones(T,1)];

elseif model == 3
    
    % No distance to ibama, nor fines
    X = [D Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title WX ones(T,1)];

elseif model == 4
    
    % No distance to ibama
    X = [D Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 WX ones(T,1)];

elseif model == 5
    
    % No Fines
    X = [D Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Ibama WX ones(T,1)];

elseif model == 6
   
    % No Spatially Lagged Regressors
    X = [D Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 Ibama ones(T,1)];
    
end

% Dimension of X
dx = size(X,2);

% Vectors That Are Not Needed Anymore (Exogenous Regressors)
clear Alt Temp Rain Slope Soil Pop Mining Powerplant PPNeighbor DistPA Title
clear PCensus Psat PsatNo Pfarm Pclouds PsatTotal 
clear G_IR G50 G75 G100 G125 G150     


%% (B) RUN REGRESSION

% Estimated Coefficient
beta_ols = (X'*X)\(X'*TC);

% Residuals
U = TC - X*beta_ols;

% Robust Variance Matrix
Var_ols = inv(X'*X)*(X'*diag(U.^2)*X)*inv(X'*X);
   
% Standard Error
se_ols = sqrt(diag(Var_ols));

% t-statistic
t_ols = beta_ols./se_ols;

% 95%-Confidence Interval
LB_ols = beta_ols - critical * sqrt(diag(Var_ols));
UB_ols = beta_ols + critical * sqrt(diag(Var_ols));
   
% F-Statistic (Wald Statistic)
R = [eye(2,2) zeros(2,dx-2)];   
F = (R*beta_ols)'*inv(R*Var_ols*R')*(R*beta_ols);
    
% R^2
R2 = 1 - (U'*U/((TC-mean(TC))'*(TC-mean(TC))));

if d_iv == 1
    F = t_ols(1,1); % only dist_port as IV (so F-stat equals t-stat)
end

%% (C) SHOW ESTIMATED RESULTS

if results_est == 1
    
    if d_iv == 0
        
        % Show the Coefficients, T-Statistics, and Confidence Intervals
        disp('----------------------------------------------------------------------------')
        disp('----------------------------------------------------------------------------')
        disp('FIRST STAGE REGRESSION: TRANSPORT COSTS ON STRAIGHT-LINE DISTANCES')
        table(beta_ols(1:2),se_ols(1:2),t_ols(1:2),'VariableNames',{'Coefficient','se','t'},'RowNames',{'Distance to Port','Distance to Capital'})
        table([T;F;R2],'VariableNames',{'Stats'},'RowNames',{'Number of Observations','F-Statistic','R2'})
        pause
    
    elseif d_iv == 1
        
        % Show the Coefficients, T-Statistics, and Confidence Intervals
        disp('----------------------------------------------------------------------------')
        disp('----------------------------------------------------------------------------')
        disp('FIRST STAGE REGRESSION: TRANSPORT COSTS ON STRAIGHT-LINE DISTANCES')
        table(beta_ols(1),se_ols(1),t_ols(1),'VariableNames',{'Coefficient','se','t'},'RowNames',{'Distance to Port'})
        table([T;F;R2],'VariableNames',{'Stats'},'RowNames',{'Number of Observations','F-Statistic','R2'})
        pause
            
    end
    
end

    